In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook_connected"
df = pd.read_csv('cleaned_india_housing_prices.csv')
In [2]:
df.head()
Out[2]:
| id | state | city | locality | property_type | bhk | size_in_sqft | price_in_lakhs | price_per_sqft | year_built | ... | age_of_property | nearby_schools | nearby_hospitals | public_transport_accessibility | parking_space | security | amenities | facing | owner_type | availability_status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Tamil Nadu | Chennai | Locality_84 | Apartment | 1 | 4740 | 489.76 | 10332.489451 | 1990 | ... | 35 | 10 | 3 | High | No | No | Playground, Gym, Garden, Pool, Clubhouse | West | Owner | Ready_to_Move |
| 1 | 2 | Maharashtra | Pune | Locality_490 | Independent House | 3 | 2364 | 195.52 | 8270.727580 | 2008 | ... | 17 | 8 | 1 | Low | No | Yes | Playground, Clubhouse, Pool, Gym, Garden | North | Builder | Under_Construction |
| 2 | 3 | Punjab | Ludhiana | Locality_167 | Apartment | 2 | 3642 | 183.79 | 5046.403075 | 1997 | ... | 28 | 9 | 8 | Low | Yes | No | Clubhouse, Pool, Playground, Gym | South | Broker | Ready_to_Move |
| 3 | 4 | Rajasthan | Jodhpur | Locality_393 | Independent House | 2 | 2741 | 300.29 | 10955.490697 | 1991 | ... | 34 | 5 | 7 | High | Yes | Yes | Playground, Clubhouse, Gym, Pool, Garden | North | Builder | Ready_to_Move |
| 4 | 5 | Rajasthan | Jaipur | Locality_466 | Villa | 4 | 4823 | 182.90 | 3792.245490 | 2002 | ... | 23 | 4 | 9 | Low | No | Yes | Playground, Garden, Gym, Pool, Clubhouse | East | Builder | Ready_to_Move |
5 rows × 23 columns
In [3]:
plt.figure(figsize=(8,5))
sns.histplot(df['price_in_lakhs'], bins=50, kde=True)
plt.title('Distribution of Property Prices (in Lakhs)')
plt.xlabel('Price (Lakhs)')
Out[3]:
Text(0.5, 0, 'Price (Lakhs)')
In [4]:
plt.figure(figsize=(8,5))
sns.histplot(df['size_in_sqft'], bins=50, kde=True)
plt.title('Distribution of Property Size (sqft)')
Out[4]:
Text(0.5, 1.0, 'Distribution of Property Size (sqft)')
In [5]:
plt.figure(figsize=(10,6))
sns.boxplot(x='property_type', y='price_per_sqft', data=df)
plt.xticks(rotation=45)
Out[5]:
([0, 1, 2], [Text(0, 0, 'Apartment'), Text(1, 0, 'Independent House'), Text(2, 0, 'Villa')])
In [6]:
fig = px.scatter(
df,
x='size_in_sqft',
y='price_in_lakhs',
color='city',
hover_data=['locality', 'property_type']
)
plt.tight_layout()
fig.show()